global root_dir = "`1'"

include "$root_dir/code/config/config.do"

cap noi log using ${log_dir}/figure_1_ipc6xx_histogram.log, replace name(fig)

capture noi {

    * Import clasification file, restrict to machinery & total >= 100, output a histgoram
    clear
    import delim ${classification_dir}/V6/ipc6XX_tf.csv, varnames(1) clear

    * a) identify machinery technical fields 
    gen machinery_field = techn_sector == "Mechanical engineering" & (techn_field == "Handling" ///
        | techn_field == "Machine tools" | techn_field == "Other special machines" ///
        | techn_field == "Textile and paper machines")
    gen ipc3 = substr(ipc6, 1, 3)
    gen ipc4 = substr(ipc6, 1, 4)
    * 1. Exclude F41 and F42: weapons and ammunitions
    replace machinery_field = 0 if ipc3 == "F41" | ipc3 == "F42"
    * 2. Include B42C: machines for book production
    replace machinery_field = 1 if ipc4 == "B42C"
    * 3. Include B07C: machines for postal sorting
    replace machinery_field = 1 if ipc4 == "B07C"
    * 4. Include G05B19 and G05B2219: programme-control systems
    replace machinery_field = 1 if ipc6 == "G05B19"
    replace machinery_field = 1 if ipc6 == "G05B2219"
    * 5. Include B62D65: engine manufacturing
    replace machinery_field = 1 if ipc6 == "B62D65"
    * Y codes in the CPC classification are organized differently -> igno"198 30 36*.85"
    gen ipc1 = substr(ipc6, 1, 1)
    assert machinery_field == 0 if ipc1 == "Y"
    tab techn_field if machinery_field == 1
    * Drop non-machinery technical fields descriptions
    replace techn_field = "non-machinery" if machinery_field == 0
    replace techn_field = "non-classified" if ipc1 == "Y"
    drop ipc1 ipc3 ipc4

    cap log using ${numb_dir}/figure_1_ipc6xx_histogram_numbers.log, replace name(numbers)
    * mark auto95 and auto90 based on non-y machinery codes with n>= 100
    *get thresholds for auto90 and auto95
    *this threshold is "wrong", Stata calculates percentiles slightly differntly than python. TO DO: We should build in bridge mechanism for the correct one if both packages are executed. Look at the python logs for the correct one. the difference is not huge, only one ipc4 pair gets thrown out more, but still.
    file open myfile using "${root_dir}/classification/datasets/V6/auto95_ipc6XX.txt", read
    file read myfile line
    file close myfile
    local thresh_pos = strpos("`line'", "thresh=") + 7
    local thresh_value = substr("`line'", `thresh_pos', .)
    local thresh_auto95 = real("`thresh_value'")
    di "Imported threshold auto95 value: `thresh_auto95'"
    

    local thresh_auto95_r: di %4.2f `thresh_auto95'
    di "`thresh_auto95_r'"
    local text_tauto95 = `thresh_auto95' - .03

    file open somefile using "${root_dir}/classification/datasets/V6/auto90_ipc6XX.txt", read
    file read somefile line
    file close somefile
    local thresh_pos = strpos("`line'", "thresh=") + 7
    local thresh_value = substr("`line'", `thresh_pos', .)
    local thresh_auto90 = real("`thresh_value'")
    di "Imported threshold auto90 value: `thresh_auto90'"


    local thresh_auto90_r: di %4.2f `thresh_auto90'
    di "`thresh_auto90_r'"
    local text_tauto90 = `thresh_auto90' - .03
    cap log close numbers

    qui include ${code_dir}/config/figuretools.do

    * Histogram with shading
    tw (histogram share_anyclassification if share_anyclassification < `thresh_auto90_r', start(0) width(0.01) lcolor(white) fcolor(gs8) lwidth(0.25pt) freq) ///
            (histogram share_anyclassification if (share_anyclassification >= `thresh_auto90_r') & (share_anyclassification < `thresh_auto95_r'), start(`thresh_auto90_r') width(0.01) lcolor(white) fcolor("`crm6'") lwidth(0.25pt) freq) ///
            (histogram share_anyclassification if share_anyclassification >= `thresh_auto95_r', start(`thresh_auto95_r') width(0.01) lcolor(white) lwidth(0.25pt) fcolor("`crm4'") freq), /// 
            xtitle("{stSerif: Prevalence of automation keywords}") ytitle("{stSerif: Frequency}") ///
            legend(off) yscale(r(0 .)) ///
            text(75 .455 "{stSerif: Auto90}", color("`crm6'") fcolor(white) bexpand j(left)) text(75 .7 "{stSerif: Auto95}", color("`crm4'")) ///
            plotregion(margin(b=0 l=0)) ylab(#3) ///
            xline(`thresh_auto90_r' `thresh_auto95_r', lpattern(dash) lcolor(black))
        graph export ${fig_dir}/main/Figure_1_ipc6xx_histogram.pdf, as(pdf) replace
        graph export ${fig_dir}/main/fig1.eps, as(eps) replace

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close fig